{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Occupation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Introduction:\n",
    "\n",
    "Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.\n",
    "\n",
    "### Step 1. Import the necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/u.user). "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 3. Assign it to a variable called users."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>gender</th>\n",
       "      <th>occupation</th>\n",
       "      <th>zip_code</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>user_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>24</td>\n",
       "      <td>M</td>\n",
       "      <td>technician</td>\n",
       "      <td>85711</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>53</td>\n",
       "      <td>F</td>\n",
       "      <td>other</td>\n",
       "      <td>94043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>23</td>\n",
       "      <td>M</td>\n",
       "      <td>writer</td>\n",
       "      <td>32067</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>24</td>\n",
       "      <td>M</td>\n",
       "      <td>technician</td>\n",
       "      <td>43537</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>33</td>\n",
       "      <td>F</td>\n",
       "      <td>other</td>\n",
       "      <td>15213</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         age gender  occupation zip_code\n",
       "user_id                                 \n",
       "1         24      M  technician    85711\n",
       "2         53      F       other    94043\n",
       "3         23      M      writer    32067\n",
       "4         24      M  technician    43537\n",
       "5         33      F       other    15213"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 4. Discover what is the mean age per occupation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "occupation\n",
       "administrator    38.746835\n",
       "artist           31.392857\n",
       "doctor           43.571429\n",
       "educator         42.010526\n",
       "engineer         36.388060\n",
       "entertainment    29.222222\n",
       "executive        38.718750\n",
       "healthcare       41.562500\n",
       "homemaker        32.571429\n",
       "lawyer           36.750000\n",
       "librarian        40.000000\n",
       "marketing        37.615385\n",
       "none             26.555556\n",
       "other            34.523810\n",
       "programmer       33.121212\n",
       "retired          63.071429\n",
       "salesman         35.666667\n",
       "scientist        35.548387\n",
       "student          22.081633\n",
       "technician       33.148148\n",
       "writer           36.311111\n",
       "Name: age, dtype: float64"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 5. Discover the Male ratio per occupation and sort it from the most to the least"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "doctor           100.000000\n",
       "engineer          97.014925\n",
       "technician        96.296296\n",
       "retired           92.857143\n",
       "programmer        90.909091\n",
       "executive         90.625000\n",
       "scientist         90.322581\n",
       "entertainment     88.888889\n",
       "lawyer            83.333333\n",
       "salesman          75.000000\n",
       "educator          72.631579\n",
       "student           69.387755\n",
       "other             65.714286\n",
       "marketing         61.538462\n",
       "writer            57.777778\n",
       "none              55.555556\n",
       "administrator     54.430380\n",
       "artist            53.571429\n",
       "librarian         43.137255\n",
       "healthcare        31.250000\n",
       "homemaker         14.285714\n",
       "dtype: float64"
      ]
     },
     "execution_count": 150,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 6. For each occupation, calculate the minimum and maximum ages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>min</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>occupation</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>administrator</th>\n",
       "      <td>21</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>artist</th>\n",
       "      <td>19</td>\n",
       "      <td>48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>doctor</th>\n",
       "      <td>28</td>\n",
       "      <td>64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>educator</th>\n",
       "      <td>23</td>\n",
       "      <td>63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>engineer</th>\n",
       "      <td>22</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>entertainment</th>\n",
       "      <td>15</td>\n",
       "      <td>50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>executive</th>\n",
       "      <td>22</td>\n",
       "      <td>69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>healthcare</th>\n",
       "      <td>22</td>\n",
       "      <td>62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>homemaker</th>\n",
       "      <td>20</td>\n",
       "      <td>50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>lawyer</th>\n",
       "      <td>21</td>\n",
       "      <td>53</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>librarian</th>\n",
       "      <td>23</td>\n",
       "      <td>69</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>marketing</th>\n",
       "      <td>24</td>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>none</th>\n",
       "      <td>11</td>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>other</th>\n",
       "      <td>13</td>\n",
       "      <td>64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>programmer</th>\n",
       "      <td>20</td>\n",
       "      <td>63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>retired</th>\n",
       "      <td>51</td>\n",
       "      <td>73</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>salesman</th>\n",
       "      <td>18</td>\n",
       "      <td>66</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>scientist</th>\n",
       "      <td>23</td>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>student</th>\n",
       "      <td>7</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>technician</th>\n",
       "      <td>21</td>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>writer</th>\n",
       "      <td>18</td>\n",
       "      <td>60</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               min  max\n",
       "occupation             \n",
       "administrator   21   70\n",
       "artist          19   48\n",
       "doctor          28   64\n",
       "educator        23   63\n",
       "engineer        22   70\n",
       "entertainment   15   50\n",
       "executive       22   69\n",
       "healthcare      22   62\n",
       "homemaker       20   50\n",
       "lawyer          21   53\n",
       "librarian       23   69\n",
       "marketing       24   55\n",
       "none            11   55\n",
       "other           13   64\n",
       "programmer      20   63\n",
       "retired         51   73\n",
       "salesman        18   66\n",
       "scientist       23   55\n",
       "student          7   42\n",
       "technician      21   55\n",
       "writer          18   60"
      ]
     },
     "execution_count": 151,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 7. For each combination of occupation and gender, calculate the mean age"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "occupation     gender\n",
       "administrator  F         40.638889\n",
       "               M         37.162791\n",
       "artist         F         30.307692\n",
       "               M         32.333333\n",
       "doctor         M         43.571429\n",
       "educator       F         39.115385\n",
       "               M         43.101449\n",
       "engineer       F         29.500000\n",
       "               M         36.600000\n",
       "entertainment  F         31.000000\n",
       "               M         29.000000\n",
       "executive      F         44.000000\n",
       "               M         38.172414\n",
       "healthcare     F         39.818182\n",
       "               M         45.400000\n",
       "homemaker      F         34.166667\n",
       "               M         23.000000\n",
       "lawyer         F         39.500000\n",
       "               M         36.200000\n",
       "librarian      F         40.000000\n",
       "               M         40.000000\n",
       "marketing      F         37.200000\n",
       "               M         37.875000\n",
       "none           F         36.500000\n",
       "               M         18.600000\n",
       "other          F         35.472222\n",
       "               M         34.028986\n",
       "programmer     F         32.166667\n",
       "               M         33.216667\n",
       "retired        F         70.000000\n",
       "               M         62.538462\n",
       "salesman       F         27.000000\n",
       "               M         38.555556\n",
       "scientist      F         28.333333\n",
       "               M         36.321429\n",
       "student        F         20.750000\n",
       "               M         22.669118\n",
       "technician     F         38.000000\n",
       "               M         32.961538\n",
       "writer         F         37.631579\n",
       "               M         35.346154\n",
       "Name: age, dtype: float64"
      ]
     },
     "execution_count": 152,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 8.  For each occupation present the percentage of women and men"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "occupation     gender\n",
       "administrator  F          45.569620\n",
       "               M          54.430380\n",
       "artist         F          46.428571\n",
       "               M          53.571429\n",
       "doctor         M         100.000000\n",
       "educator       F          27.368421\n",
       "               M          72.631579\n",
       "engineer       F           2.985075\n",
       "               M          97.014925\n",
       "entertainment  F          11.111111\n",
       "               M          88.888889\n",
       "executive      F           9.375000\n",
       "               M          90.625000\n",
       "healthcare     F          68.750000\n",
       "               M          31.250000\n",
       "homemaker      F          85.714286\n",
       "               M          14.285714\n",
       "lawyer         F          16.666667\n",
       "               M          83.333333\n",
       "librarian      F          56.862745\n",
       "               M          43.137255\n",
       "marketing      F          38.461538\n",
       "               M          61.538462\n",
       "none           F          44.444444\n",
       "               M          55.555556\n",
       "other          F          34.285714\n",
       "               M          65.714286\n",
       "programmer     F           9.090909\n",
       "               M          90.909091\n",
       "retired        F           7.142857\n",
       "               M          92.857143\n",
       "salesman       F          25.000000\n",
       "               M          75.000000\n",
       "scientist      F           9.677419\n",
       "               M          90.322581\n",
       "student        F          30.612245\n",
       "               M          69.387755\n",
       "technician     F           3.703704\n",
       "               M          96.296296\n",
       "writer         F          42.222222\n",
       "               M          57.777778\n",
       "Name: gender, dtype: float64"
      ]
     },
     "execution_count": 154,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
